# Perform estimation of market models and imputes synthetic counterfactuals
# Setup ----
rm(list=ls())

library(tidyverse)
library(bizdays) # for computing business days
library(furrr) # parallelize map
library(tictoc)
`%!in%` <- Negate(`%in%`)
source('aux/estimation.R')

# ignore warnings from future about setting the seed in the LASSO - cause the seed is set by the user in any case
options(future.rng.onMisuse = 'ignore') 

# read prices and indexes data
data_base <- read_csv('data/prices.csv.gz')
indexes <- read_csv('data/indexes.csv.gz')
stoxx600 <- read_csv("data/indexes_stoxx600.csv.gz")

ols_indexes <- 2:7

# Main estimation window ----
# set events:
events <- c('package_2' = unique(data_base$package_2), 'package_3' = unique(data_base$package_3),
            'package_4' = unique(data_base$package_4), 
            'repowereu_1' = unique(data_base$repowereu_1), 'repowereu_2' = unique(data_base$repowereu_2),
            'repowereu_3' = unique(data_base$repowereu_3))

plan(multisession, workers = 6) # plan 6 R sessions to run in parallel

out_fit <- NULL
out_fit_OLS <- NULL
lasso_indexes <- 8:ncol(indexes)
data <- data_base

tic()
for (e in events) {
  ## LASSO ----
  # 'estimation()' is the function that does all the heavy lifting. It takes various inputs. A dataframe with the 
  # price series (the dataset must have a variable for the date called 'date' and one for returns called 'chg'). 
  # It takes a separate dataframe containing predictive indexes and a vector specifying the positions of the indexes to be used.
  # It takes an event date (in date format) and the distance of the estimation and event window start and stop days relative to the event.
  # The user can also choose whether they want to perform a LASSO estimation (default) or OLS. In case of a LASSO, the user should set a seed
  out <- future_map(.x = unique(data$ticker),
                    .f = estimation,
                    data = data, indexes = indexes, indexes_columns = lasso_indexes,
                    event_date = as.Date(e, origin = "1970-01-01 UTC"), type = 'LASSO', seed = 160493,
                    estim_start_days = -61L, estim_stop_days = -1L, event_start_days = 0, event_stop_days = 20L,
                    .progress = TRUE)
  
  ### Extract results ----
  est_fit <- map(.x = 1:length(out),
                 .f = function(x) out[[x]][[1]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  est_val <- map(.x = 1:length(out),
                 .f = function(x) out[[x]][[2]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  ### Add results to dataframe ----
  name <- names(events)[events == e]
  data <- data %>%
    left_join(est_val %>%
                select(ticker, date, fitted_LASSO, abnormal_LASSO) %>%
                rename_with(.fn = ~paste0('exp_chg_lasso_', name), .cols = fitted_LASSO) %>%
                rename_with(.fn = ~paste0('abn_chg_lasso_', name), .cols = abnormal_LASSO),
              by = c('ticker', 'date'))
  
  out_fit <- bind_rows(out_fit, est_fit)
  
  ## OLS ----
  cat("\nStart with OLS")
  out <- future_map(.x = unique(data$ticker),
                    .f = estimation,
                    data = data, indexes = indexes, indexes_columns = ols_indexes,
                    event_date = as.Date(e, origin = "1970-01-01 UTC"), type = 'OLS', seed = 160493,
                    estim_start_days = -61L, estim_stop_days = -1L, event_start_days = 0, event_stop_days = 20L,
                    .progress = TRUE)
  
  ### Extract results ----
  est_fit_OLS <- map(.x = 1:length(out),
                     .f = function(x) out[[x]][[1]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  est_val_OLS <- map(.x = 1:length(out),
                     .f = function(x) out[[x]][[2]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  ### Add results to dataframe ----
  name <- names(events)[events == e]
  data <- data %>%
    left_join(est_val_OLS %>%
                select(ticker, date, fitted_ols, abnormal_ols) %>%
                rename_with(.fn = ~paste0('exp_chg_ols_', name), .cols = fitted_ols) %>%
                rename_with(.fn = ~paste0('abn_chg_ols_', name), .cols = abnormal_ols),
              by = c('ticker', 'date'))
  
  out_fit_OLS <- bind_rows(out_fit_OLS, est_fit_OLS)
  
  cat('\nEvent:', name, 'done')
}
toc()
# It takes about 7 min with 6 sessions running in parallel

## Export ----
# add goodness of fit, reorder, and export
data <- data %>%
  group_by(ticker) %>%
  fill(company, NAICS_name, country_ex, country_hq, ex_name, .direction = 'down') %>%
  left_join(out_fit %>%
              as_tibble() %>%
              select(ticker, R2, name_event) %>%
              pivot_wider(id_cols = "ticker", names_from = "name_event", values_from = "R2",
                          names_prefix = "R2_lasso_"),
            by = "ticker")

colnames(data) 

# reorder columns so as to have variables related to the same event together:
cols_order <- unlist(lapply(c("package_2", "package_3", "package_4", "repowereu_1", "repowereu_2", "repowereu_3"), function(g) {
  paste0(c("exp_chg_lasso", "abn_chg_lasso", "R2_lasso"), "_", g)
}))

data <- data %>%
  relocate(all_of(cols_order), .after = Europe_traded)

data %>%
  write_csv(gzfile('data_out/analysis.csv.gz'))

# Shorter window length ----
out_fit <- NULL
lasso_indexes <- 8:ncol(indexes)
data <- data_base

tic()
for (e in events) {
  ## LASSO ----
  out <- future_map(.x = unique(data$ticker),
                    .f = estimation,
                    data = data, indexes = indexes, indexes_columns = lasso_indexes,
                    event_date = as.Date(e, origin = "1970-01-01 UTC"), type = 'LASSO', seed = 160493,
                    estim_start_days = -31L, estim_stop_days = -1L, event_start_days = 0, event_stop_days = 20L,
                    .progress = TRUE)
  
  ### Extract results ----
  est_fit <- map(.x = 1:length(out),
                 .f = function(x) out[[x]][[1]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  est_val <- map(.x = 1:length(out),
                 .f = function(x) out[[x]][[2]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  ### Add results to dataframe ----
  name <- names(events)[events == e]
  data <- data %>%
    left_join(est_val %>%
                select(ticker, date, fitted_LASSO, abnormal_LASSO) %>%
                rename_with(.fn = ~paste0('exp_chg_lasso_', name), .cols = fitted_LASSO) %>%
                rename_with(.fn = ~paste0('abn_chg_lasso_', name), .cols = abnormal_LASSO),
              by = c('ticker', 'date'))
  
  out_fit <- bind_rows(out_fit, est_fit)
  
  cat('\nEvent:', name, 'done')
}
toc()
# It takes about 4m with 6 sessions running in parallel

## Export ----
colnames(data)

# export main data
data %>%
  group_by(ticker) %>%
  fill(company, NAICS_name, country_ex, country_hq, ex_name, .direction = 'down') %>%
  write_csv(gzfile('data_out/analysis_31d.csv.gz'))

# Longer window length ----
out_fit <- NULL
lasso_indexes <- 8:ncol(indexes)
data <- data_base

tic()
for (e in events) {
  ## LASSO ----
  out <- future_map(.x = unique(data$ticker),
                    .f = estimation,
                    data = data, indexes = indexes, indexes_columns = lasso_indexes,
                    event_date = as.Date(e, origin = "1970-01-01 UTC"), type = 'LASSO', seed = 160493,
                    estim_start_days = -181L, estim_stop_days = -1L, event_start_days = 0, event_stop_days = 20L,
                    .progress = TRUE)
  
  ### Extract results ----
  est_fit <- map(.x = 1:length(out),
                 .f = function(x) out[[x]][[1]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  est_val <- map(.x = 1:length(out),
                 .f = function(x) out[[x]][[2]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  ### Add results to dataframe ----
  name <- names(events)[events == e]
  data <- data %>%
    left_join(est_val %>%
                select(ticker, date, fitted_LASSO, abnormal_LASSO) %>%
                rename_with(.fn = ~paste0('exp_chg_lasso_', name), .cols = fitted_LASSO) %>%
                rename_with(.fn = ~paste0('abn_chg_lasso_', name), .cols = abnormal_LASSO),
              by = c('ticker', 'date'))
  
  out_fit <- bind_rows(out_fit, est_fit)
  
  cat('\nEvent:', name, 'done')
}
toc()
# It takes about 5m with 6 sessions running in parallel

## Export ----
# export main data
data %>%
  group_by(ticker) %>%
  fill(company, NAICS_name, country_ex, country_hq, ex_name, .direction = 'down') %>%
  write_csv(gzfile('data_out/analysis_181d.csv.gz'))

# Stoxx 600 ----
out_fit <- NULL
lasso_indexes <- 2:ncol(stoxx600)
data <- data_base

tic()
for (e in events) {
  ## LASSO ----
  out <- future_map(.x = unique(data$ticker),
                    .f = estimation,
                    data = data, indexes = stoxx600, indexes_columns = lasso_indexes,
                    event_date = as.Date(e, origin = "1970-01-01 UTC"), type = 'LASSO', seed = 160493,
                    estim_start_days = -61L, estim_stop_days = -1L, event_start_days = 0, event_stop_days = 20L,
                    .progress = TRUE)
  
  ### Extract results ----
  est_fit <- map(.x = 1:length(out),
                 .f = function(x) out[[x]][[1]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  est_val <- map(.x = 1:length(out),
                 .f = function(x) out[[x]][[2]]) %>%
    bind_rows() %>%
    mutate(event = e,
           name_event = names(events)[events == e])
  
  ### Add results to dataframe ----
  name <- names(events)[events == e]
  data <- data %>%
    left_join(est_val %>%
                select(ticker, date, fitted_LASSO, abnormal_LASSO) %>%
                rename_with(.fn = ~paste0('exp_chg_lasso_', name), .cols = fitted_LASSO) %>%
                rename_with(.fn = ~paste0('abn_chg_lasso_', name), .cols = abnormal_LASSO),
              by = c('ticker', 'date'))
  
  out_fit <- bind_rows(out_fit, est_fit)
  cat('\nEvent:', name, 'done')
}
toc()
# It takes about 4 min with 6 sessions running in parallel

## Export ----
# export main data
data %>%
  group_by(ticker) %>%
  fill(company, NAICS_name, country_ex, country_hq, ex_name, .direction = 'down') %>%
  write_csv(gzfile('data_out/analysis_stoxx600.csv.gz'))

#===== The End =====#